## "China's Ideological Spectrum"
## by Jennifer Pan and Yiqing Xu

## #################################
## ## Individual-level Correlations
## #################################

### WARNING - this part takes fairly long processing time
### Results can be directly loaded from intermediate file (line 81)

## rm(list=ls(all=TRUE)) 
## load("output/result_cfa.RData")
## load("output/result_cfa_boot.RData")

## ## income
## d$inc<-d$income
## d$inc[which(d$inc%in%c(1,2))]<-1.5 # 0-25K, 25-50K --> 0-50K
## d$inc[which(d$inc%in%c(3,4,5))]<-3.5 # 50-75K, 75-100K, 100-150K --> 50-150K
## (out <- aggregate(lt1 ~ inc, data = d, mean))

## ## education
## d[which(d$educ==1),]$educ <- 2
## (out <- aggregate(lt1 ~ educ, data = d[which(d$age>=40 & d$female ==0),], mean))

## ## stacking bootstrapped estimates together
## nobs <- dim(result.lv)[1]
## dim3 <- dim(result.lv)[3]
## ndim <- 3
## s <- matrix(NA, nobs * dim3, ndim)
## for (i in 1:nobs) {
##     s[((i-1)*dim3+1):(i*dim3),] <- t(result.lv[i,,]) 
## }
## s <- data.frame(s)
## colnames(s) <- c("lt1","lt2","lt3")

## ## bootstrap function
## mean.boot <- function(X, nsims=100) {
    
##     ## variable names
##     ## X  grouping variable
##     ## nsims  number of simulations
    
##     nX<-unique(d[,X]) # unique levels
    
##     ## create IDs
##     id.obs <-1:nobs
##     id.dim3 <- 1:dim3
##     ndim <- 3
    
##     ## bootstrap
##     results<-array(NA,dim = c(length(nX),ndim,nsims))
##     for (i in 1:nsims) {
##         id.smp <- sample(1:nobs, nobs, replace = TRUE) # which unit
##         ss <- d[id.smp, ]
##         results[,,i]<-do.call("rbind",by(ss,ss[,X],function(mat) apply(mat[,c("lt1","lt2","lt3")],2,mean)))
##         if (i%%10==0) cat(i) else cat(".")
##     }
    
##     ## summary
##     output<-array(NA,dim = c(length(nX), ndim,4)) ## mean, se, CI_l, CI_u 
##     output[,,1]<-do.call("rbind",by(d,d[,X],function(mat) apply(mat[,c("lt1","lt2","lt3")],2,mean)))
##     output[,,2]<-apply(results,c(1,2),sd)
##     output[,,3]<-apply(results,c(1,2), quantile,c(0.025))
##     output[,,4]<-apply(results,c(1,2),quantile,c(0.975))
##     return(output)
## }

## ## bootstrap
## nsims <- 1000 
## ses.female <- mean.boot(X="female", nsims = nsims)
## ses.edu <- mean.boot(X="educ", nsims= nsims)
## ses.income<-mean.boot(X="inc", nsims= nsims)
## ses.age <- mean.boot(X="age", nsims= nsims)
## save(ses.female, ses.edu, ses.income, ses.age,
##      file="output/result_cfa_ind.RData")

###############################
# Figures 10, 11. Correlations
###############################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_ind.RData")

mycol = c(1, "gray50", "gray80")
mypch = c(16, 17, 15)

## Education
pdf("graphs/ses_educ.pdf")
output <- ses.edu
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(4.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Education", ylim=c(-0.6,0.6), xlim=c(0.5,3.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:3, labels=c("Below\nCollege","College\n","Above\nCollege"),
     cex.lab = 1.5, cex.axis=1.2, tick=FALSE,line=1)
axis(1,at=1:3, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,3.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism",
                             "Pro-Market/Non-Traditional Values",
                             "Nationalism * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
graphics.off()


## Income
pdf("graphs/ses_income.pdf")
output <- ses.income
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(4.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Annual Income", ylim=c(-0.6,0.6), xlim=c(0.5,4.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:4, labels=c("0-50K","50-150K","150-300K",">300K"), cex.axis=1.5,tick=F,line=0)
axis(1,at=1:4, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,4.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # three latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism",
                             "Pro-Market/Non-Traditional Values",
                             "Nationalism * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
graphics.off()



## Age
pdf("graphs/ses_age.pdf",width = 20)
output <- ses.age
legends = c("Political Liberalism","Pro-Market/Non-Traditional Values","Nationalism * (-1)")
par(mar=c(5,4.5,2,2), mfcol = c(1,3))
for (k in 1:3) {
    plot(1, type="n",ylab="", xlab="Age", ylim=c(-0.6,0.6), xlim=c(-1,45),
         cex.lab=2.5, cex.axis=2, xaxt="n")
    abline(h=0)
    axis(1,at=seq(1,43, by = 3), labels=seq(18, 60, by =3),
         cex.axis= 2,tick=TRUE,line=0)
    for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
    for(i in seq(-2,45,by=3)) abline(v=i,col="#AAAAAA50",lwd=1)
    n<-dim(output)[1]
    for (i in 1:n) {lines(c(i,i),c(output[i,k,3],output[i,k,4]),
                          lwd= 4, col = 1)}
    points(c(1:n),output[,k,1],pch=mypch[k],cex=2, col = 1)
    title(legends[k], line = -3, cex.main = 2.5)
}
graphics.off()


## Gender (not in the paper)
## pdf("graphs/ses_gender.pdf")
## output <- ses.female
## offset <- seq(-0.15,0.15,length.out=3)
## par(mar=c(4.5,4.5,2,2))
## plot(1, type="n",ylab="", xlab="Gender", ylim=c(-0.4,0.4), xlim=c(0.5,2.5),
##      cex.lab=1.5, cex.axis=1.5, xaxt="n")
## abline(h=0)
## axis(1,at=1:2, labels=c("Male", "Female"), tick = FALSE, cex.axis=1.5)
## axis(1,at=1:3, labels = NA, tick = TRUE)
## for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA80",lwd=1)
## for(i in seq(-0.5,2.5,by=0.25)) abline(v=i,col="#AAAAAA80",lwd=1)
## n<-dim(output)[1]
## for (k in 1:3) { # four latent variables
##     for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
##     points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
## }
## legend("topright", legend = c("Political Liberalism",
##                               "Pro-Market/Non-Traditional Values",
##                               "Nationalism * (-1)"),
##        col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
## graphics.off()

